#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @Time   : 2021/3/27 22:12
# @Author : cjw
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
import time

if __name__ == '__main__':
    # documentElement表示获取根节点元素
    js_html = "var q=document.documentElement.scrollTop=500"
    # documentElement表示获取body节点元素
    # js_body = "var q=document.body.scrollTop=10000"
    # getElementsByClassName表示获取class='main'的元素列表，0表示第一个，所以使用的时候要加索引
    # js_div = "var q=document.getElementsByClassName('main')[0].scrollTop = 10000"
    # self.driver.execute_script(js)

    # 不显示自动化软件控制提示
    options = webdriver.ChromeOptions()
    options.add_experimental_option('excludeSwitches', ['enable-automation'])

    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    driver.get('https://www.lagou.com/')
    # driver.find_element_by_xpath('//*[@id="changeCityBox"]/ul/li[1]/a')
    WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//*[@id="changeCityBox"]/ul/li[1]/a')).click()
    WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//*[@id="search_input"]')). \
        send_keys('python', Keys.ENTER)

    # 获取最大页数
    max_page = WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath(
        '//div[@class="pager_container"]/span[@class="pager_not_current"][3]')).text
    print(max_page, type(max_page))
    try:
        for page in range(int(max_page)):
            print(f'------------------------第{page + 1}页开始数据提取---------------------------')
            # 寻找页面中存放数据的li
            li_list = WebDriverWait(driver, 20).until(
                lambda x: x.find_elements_by_xpath('//*[@id="s_position_list"]/ul/li'))
            for li in li_list:
                job_name = li.find_element_by_tag_name('h3').text
                job_place = li.find_element_by_xpath('.//a[@class="position_link"]/span/em').text
                money = li.find_element_by_xpath('.//span[@class="money"]').text
                experience_education = li.find_element_by_xpath('.//div[@class="li_b_l"]').text
                company_name = li.find_element_by_xpath('.//div[@class="company_name"]/a').text
                *experience, education = [e.strip() for e in experience_education.split('/')]
                print(company_name, job_name, job_place, money, experience, education)
            print(f'------------------------第{page + 1}页已完成数据提取---------------------------')
            for i in range(3):
                driver.execute_script(js_html)  # 滚动到底部
                time.sleep(0.5)
            # 点击下一页
            WebDriverWait(driver, 20).until(
                lambda x: x.find_element_by_xpath('//div[@class="pager_container"]/span[@action="next"]')).click()
    except Exception as e:
        print(e)
    finally:
        driver.quit()
